In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

from keras.models import Sequential, model_from_json
from keras.layers import Dense, Dropout, Activation, Flatten, Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator

import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline 
tf.logging.set_verbosity(tf.logging.ERROR)


Using TensorFlow backend.

In [2]:
tf.set_random_seed(2017)
np.random.seed(2017)

1. Data Agumentation

  • image rotation(10, 20, 30)
  • image width shift(0.1, 0.2, 0.3)

In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# test data
test_images = mnist.test.images.reshape(10000, 28, 28, 1)
test_labels = mnist.test.labels[:]

augmentation_size = 440000
images = np.concatenate((mnist.train.images.reshape(55000, 28, 28, 1), mnist.validation.images.reshape(5000, 28, 28, 1)), axis=0)
labels = np.concatenate((mnist.train.labels, mnist.validation.labels), axis=0)

datagen_list = [
                ImageDataGenerator(rotation_range=10),
                ImageDataGenerator(rotation_range=20),
                ImageDataGenerator(rotation_range=30),
                ImageDataGenerator(width_shift_range=0.1),
                ImageDataGenerator(width_shift_range=0.2),
                ImageDataGenerator(width_shift_range=0.3),
               ]

for datagen in datagen_list:
    datagen.fit(images)
    for image, label in datagen.flow(images, labels, batch_size=augmentation_size, shuffle=True, seed=2017):
        images = np.concatenate((images, image), axis=0)
        labels = np.concatenate((labels, label), axis=0)
        break


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

In [4]:
print('Train Data Set :', images.shape)
print('Test Data Set  :', test_images.shape)


Train Data Set : (1800000, 28, 28, 1)
Test Data Set  : (10000, 28, 28, 1)

2. Training

Architecture

  1. Convolution * 2 + MaxPool + Dropout
  2. Convolution * 2 + MaxPool + Dropout
  3. Convolution + MaxPool + Dropout
  4. Dense + Dropout
  5. Dense + Dropout
  6. Output

Model Point

  1. Using Small Vggnet Ensemble
  2. Three different Convolutional Layer filter sizes(Model1 = 3, Model2 = 5, Model3 = 7)
  3. Elu Activation Function
  4. Adam Optimizer(learning rate = 0.0001)
  5. Data Augmentation(image rotation, image width shift)
  6. Batch Data Shuffle in training

Model 1


In [5]:
model1 = Sequential([Convolution2D(filters=64, kernel_size=(3, 3), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(3, 3), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(3, 3), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(3, 3), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(3, 3), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model1.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model1.fit(images, labels, batch_size=256, epochs=20, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 1800000 samples, validate on 10000 samples
Epoch 1/20
1800000/1800000 [==============================] - 425s - loss: 0.2764 - acc: 0.9133 - val_loss: 0.0293 - val_acc: 0.9899
Epoch 2/20
1800000/1800000 [==============================] - 424s - loss: 0.1080 - acc: 0.9665 - val_loss: 0.0230 - val_acc: 0.9926
Epoch 3/20
1800000/1800000 [==============================] - 434s - loss: 0.0770 - acc: 0.9760 - val_loss: 0.0168 - val_acc: 0.9949
Epoch 4/20
1800000/1800000 [==============================] - 433s - loss: 0.0622 - acc: 0.9805 - val_loss: 0.0160 - val_acc: 0.9954
Epoch 5/20
1800000/1800000 [==============================] - 424s - loss: 0.0524 - acc: 0.9835 - val_loss: 0.0148 - val_acc: 0.9960
Epoch 6/20
1800000/1800000 [==============================] - 424s - loss: 0.0463 - acc: 0.9853 - val_loss: 0.0131 - val_acc: 0.9962
Epoch 7/20
1800000/1800000 [==============================] - 424s - loss: 0.0416 - acc: 0.9869 - val_loss: 0.0145 - val_acc: 0.9962
Epoch 8/20
1800000/1800000 [==============================] - 424s - loss: 0.0382 - acc: 0.9879 - val_loss: 0.0147 - val_acc: 0.9960
Epoch 9/20
1800000/1800000 [==============================] - 423s - loss: 0.0352 - acc: 0.9887 - val_loss: 0.0130 - val_acc: 0.9960
Epoch 10/20
1800000/1800000 [==============================] - 423s - loss: 0.0332 - acc: 0.9895 - val_loss: 0.0136 - val_acc: 0.9966
Epoch 11/20
1800000/1800000 [==============================] - 424s - loss: 0.0314 - acc: 0.9899 - val_loss: 0.0123 - val_acc: 0.9964
Epoch 12/20
1800000/1800000 [==============================] - 423s - loss: 0.0296 - acc: 0.9906 - val_loss: 0.0123 - val_acc: 0.9967
Epoch 13/20
1800000/1800000 [==============================] - 423s - loss: 0.0283 - acc: 0.9910 - val_loss: 0.0143 - val_acc: 0.9964
Epoch 14/20
1800000/1800000 [==============================] - 423s - loss: 0.0274 - acc: 0.9913 - val_loss: 0.0133 - val_acc: 0.9966
Epoch 15/20
1800000/1800000 [==============================] - 423s - loss: 0.0264 - acc: 0.9916 - val_loss: 0.0147 - val_acc: 0.9960
Epoch 16/20
1800000/1800000 [==============================] - 423s - loss: 0.0255 - acc: 0.9918 - val_loss: 0.0142 - val_acc: 0.9970
Epoch 17/20
1800000/1800000 [==============================] - 423s - loss: 0.0248 - acc: 0.9921 - val_loss: 0.0141 - val_acc: 0.9966
Epoch 18/20
1800000/1800000 [==============================] - 423s - loss: 0.0238 - acc: 0.9925 - val_loss: 0.0145 - val_acc: 0.9967
Epoch 19/20
1800000/1800000 [==============================] - 423s - loss: 0.0236 - acc: 0.9925 - val_loss: 0.0152 - val_acc: 0.9967
Epoch 20/20
1800000/1800000 [==============================] - 423s - loss: 0.0231 - acc: 0.9927 - val_loss: 0.0161 - val_acc: 0.9969
Out[5]:
<keras.callbacks.History at 0x22e9aff05f8>

In [6]:
model_json = model1.to_json()
with open("model1.json", "w") as json_file:
    json_file.write(model_json)
model1.save_weights("model1.h5")
print("Saved model to disk")


Saved model to disk

Model 2


In [7]:
model2 = Sequential([Convolution2D(filters=64, kernel_size=(5, 5), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(5, 5), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(5, 5), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(5, 5), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(5, 5), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model2.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model2.fit(images, labels, batch_size=256, epochs=20, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 1800000 samples, validate on 10000 samples
Epoch 1/20
1800000/1800000 [==============================] - 607s - loss: 0.2144 - acc: 0.9333 - val_loss: 0.0209 - val_acc: 0.9936
Epoch 2/20
1800000/1800000 [==============================] - 606s - loss: 0.0838 - acc: 0.9742 - val_loss: 0.0170 - val_acc: 0.9947
Epoch 3/20
1800000/1800000 [==============================] - 610s - loss: 0.0570 - acc: 0.9823 - val_loss: 0.0137 - val_acc: 0.9958
Epoch 4/20
1800000/1800000 [==============================] - 613s - loss: 0.0442 - acc: 0.9862 - val_loss: 0.0146 - val_acc: 0.9960
Epoch 5/20
1800000/1800000 [==============================] - 612s - loss: 0.0360 - acc: 0.9887 - val_loss: 0.0128 - val_acc: 0.9964
Epoch 6/20
1800000/1800000 [==============================] - 613s - loss: 0.0313 - acc: 0.9903 - val_loss: 0.0139 - val_acc: 0.9960
Epoch 7/20
1800000/1800000 [==============================] - 612s - loss: 0.0276 - acc: 0.9912 - val_loss: 0.0108 - val_acc: 0.9964
Epoch 8/20
1800000/1800000 [==============================] - 611s - loss: 0.0251 - acc: 0.9921 - val_loss: 0.0132 - val_acc: 0.9962
Epoch 9/20
1800000/1800000 [==============================] - 610s - loss: 0.0230 - acc: 0.9927 - val_loss: 0.0120 - val_acc: 0.9974
Epoch 10/20
1800000/1800000 [==============================] - 610s - loss: 0.0214 - acc: 0.9932 - val_loss: 0.0110 - val_acc: 0.9972
Epoch 11/20
1800000/1800000 [==============================] - 610s - loss: 0.0201 - acc: 0.9937 - val_loss: 0.0125 - val_acc: 0.9966
Epoch 12/20
1800000/1800000 [==============================] - 610s - loss: 0.0188 - acc: 0.9941 - val_loss: 0.0123 - val_acc: 0.9972
Epoch 13/20
1800000/1800000 [==============================] - 610s - loss: 0.0183 - acc: 0.9943 - val_loss: 0.0100 - val_acc: 0.9972
Epoch 14/20
1800000/1800000 [==============================] - 610s - loss: 0.0174 - acc: 0.9946 - val_loss: 0.0138 - val_acc: 0.9968
Epoch 15/20
1800000/1800000 [==============================] - 610s - loss: 0.0166 - acc: 0.9948 - val_loss: 0.0136 - val_acc: 0.9968
Epoch 16/20
1800000/1800000 [==============================] - 610s - loss: 0.0160 - acc: 0.9950 - val_loss: 0.0116 - val_acc: 0.9974
Epoch 17/20
1800000/1800000 [==============================] - 610s - loss: 0.0155 - acc: 0.9952 - val_loss: 0.0152 - val_acc: 0.9959
Epoch 18/20
1800000/1800000 [==============================] - 610s - loss: 0.0151 - acc: 0.9953 - val_loss: 0.0139 - val_acc: 0.9968
Epoch 19/20
1800000/1800000 [==============================] - 610s - loss: 0.0148 - acc: 0.9955 - val_loss: 0.0140 - val_acc: 0.9971
Epoch 20/20
1800000/1800000 [==============================] - 610s - loss: 0.0145 - acc: 0.9956 - val_loss: 0.0128 - val_acc: 0.9972
Out[7]:
<keras.callbacks.History at 0x22e9abd20f0>

In [8]:
model_json = model2.to_json()
with open("model2.json", "w") as json_file:
    json_file.write(model_json)
model2.save_weights("model2.h5")
print("Saved model to disk")


Saved model to disk

Model 3


In [9]:
model3 = Sequential([Convolution2D(filters=64, kernel_size=(7, 7), padding='same', activation='elu', input_shape=(28, 28, 1)), 
                     Convolution2D(filters=128, kernel_size=(7, 7), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(7, 7), padding='same', activation='elu'),
                     Convolution2D(filters=128, kernel_size=(7, 7), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Convolution2D(filters=128, kernel_size=(7, 7), padding='same', activation='elu'),
                     MaxPooling2D(pool_size=(2, 2)),
                     Dropout(0.5),
                     Flatten(),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(1024, activation='elu'),
                     Dropout(0.5),
                     Dense(10, activation='softmax'),
                     ])
model3.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model3.fit(images, labels, batch_size=256, epochs=20, shuffle=True, verbose=1, validation_data=(test_images, test_labels))


Train on 1800000 samples, validate on 10000 samples
Epoch 1/20
1800000/1800000 [==============================] - 842s - loss: 0.1917 - acc: 0.9408 - val_loss: 0.0209 - val_acc: 0.9936
Epoch 2/20
1800000/1800000 [==============================] - 840s - loss: 0.0708 - acc: 0.9785 - val_loss: 0.0146 - val_acc: 0.9956
Epoch 3/20
1800000/1800000 [==============================] - 840s - loss: 0.0462 - acc: 0.9858 - val_loss: 0.0117 - val_acc: 0.9967
Epoch 4/20
1800000/1800000 [==============================] - 839s - loss: 0.0341 - acc: 0.9894 - val_loss: 0.0139 - val_acc: 0.9967
Epoch 5/20
1800000/1800000 [==============================] - 839s - loss: 0.0276 - acc: 0.9914 - val_loss: 0.0139 - val_acc: 0.9966
Epoch 6/20
1800000/1800000 [==============================] - 838s - loss: 0.0234 - acc: 0.9927 - val_loss: 0.0145 - val_acc: 0.9964
Epoch 7/20
1800000/1800000 [==============================] - 840s - loss: 0.0205 - acc: 0.9936 - val_loss: 0.0127 - val_acc: 0.9972
Epoch 8/20
1800000/1800000 [==============================] - 843s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0147 - val_acc: 0.9967
Epoch 9/20
1800000/1800000 [==============================] - 842s - loss: 0.0171 - acc: 0.9948 - val_loss: 0.0154 - val_acc: 0.9967
Epoch 10/20
1800000/1800000 [==============================] - 846s - loss: 0.0156 - acc: 0.9952 - val_loss: 0.0161 - val_acc: 0.9965
Epoch 11/20
1800000/1800000 [==============================] - 852s - loss: 0.0148 - acc: 0.9955 - val_loss: 0.0147 - val_acc: 0.9967
Epoch 12/20
1800000/1800000 [==============================] - 846s - loss: 0.0140 - acc: 0.9958 - val_loss: 0.0142 - val_acc: 0.9971
Epoch 13/20
1800000/1800000 [==============================] - 842s - loss: 0.0132 - acc: 0.9960 - val_loss: 0.0148 - val_acc: 0.9972
Epoch 14/20
1800000/1800000 [==============================] - 841s - loss: 0.0127 - acc: 0.9962 - val_loss: 0.0150 - val_acc: 0.9972
Epoch 15/20
1800000/1800000 [==============================] - 836s - loss: 0.0124 - acc: 0.9964 - val_loss: 0.0145 - val_acc: 0.9971
Epoch 16/20
1800000/1800000 [==============================] - 836s - loss: 0.0120 - acc: 0.9964 - val_loss: 0.0186 - val_acc: 0.9967
Epoch 17/20
1800000/1800000 [==============================] - 851s - loss: 0.0117 - acc: 0.9965 - val_loss: 0.0162 - val_acc: 0.9974
Epoch 18/20
1800000/1800000 [==============================] - 850s - loss: 0.0114 - acc: 0.9967 - val_loss: 0.0170 - val_acc: 0.9974
Epoch 19/20
1800000/1800000 [==============================] - 850s - loss: 0.0115 - acc: 0.9967 - val_loss: 0.0146 - val_acc: 0.9968
Epoch 20/20
1800000/1800000 [==============================] - 849s - loss: 0.0113 - acc: 0.9968 - val_loss: 0.0129 - val_acc: 0.9972
Out[9]:
<keras.callbacks.History at 0x22e9b7c4898>

In [10]:
model_json = model3.to_json()
with open("model3.json", "w") as json_file:
    json_file.write(model_json)
model3.save_weights("model3.h5")
print("Saved model to disk")


Saved model to disk

3. Evaluate


In [11]:
from keras.models import model_from_json
from tensorflow.examples.tutorials.mnist import input_data
from keras.optimizers import Adam
import numpy as np

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# test data
test_images = mnist.test.images.reshape(10000, 28, 28, 1)
test_labels = mnist.test.labels[:]


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

In [12]:
# load json and create model
def model_open(name, test_images, test_labels):
    json_file = open(name + '.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    # load weights into new model
    loaded_model.load_weights(name + '.h5')
    print("Loaded model from disk")
    loaded_model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['acc'])
    prob = loaded_model.predict_proba(test_images)
    acc = np.mean(np.equal(np.argmax(prob, axis=1), np.argmax(test_labels, axis=1)))
    print('\nmodel : %s, test accuracy : %.4f\n' % (name, acc))
    return prob, acc

In [13]:
model_1_prob, model_1_acc = model_open('model1', test_images, test_labels)
model_2_prob, model_2_acc = model_open('model2', test_images, test_labels)
model_3_prob, model_3_acc = model_open('model3', test_images, test_labels)


Loaded model from disk
 9888/10000 [============================>.] - ETA: 0s
model : model1, test accuracy : 0.9969

Loaded model from disk
 9952/10000 [============================>.] - ETA: 0s
model : model2, test accuracy : 0.9972

Loaded model from disk
10000/10000 [==============================] - 2s     

model : model3, test accuracy : 0.9972

4. Final Result(Ensemble)

  • This ensembe method gives higher weight to the model with higher performance. If the performances of two models were equal, the higher weight was given to the one with a bigger filter size
  • best accuracy with a single model is $0.9972$
  • Final Ensemble Accuracy is $0.9980$

In [14]:
def model_ensemble(prob1, acc1, prob2, acc2, prob3, acc3):
    prob_list = [prob1, prob2, prob3]
    acc_list = [acc1, acc2, acc3]
    idx_acc_list = {idx: acc for idx, acc in enumerate(acc_list)}
    sorted_acc_list = [idx for idx, _ in sorted(idx_acc_list.items(), key=lambda value: (value[1], value[0]), reverse=True)]
    final_prob = 0
    for i in sorted_acc_list:
        final_prob += prob_list[i] * (i+1)
    final_score = np.mean(np.equal(np.argmax(final_prob, axis=1), np.argmax(test_labels, axis=1))) # Test
    print('Final test accuracy : %.4f' % final_score)

In [15]:
model_ensemble(model_1_prob, model_1_acc, model_2_prob, model_2_acc, model_3_prob, model_3_acc)


Final test accuracy : 0.9980